Beta Regression

Code
library(zoib)
library(tidyverse)
library(GGally)
library(ggpubr)
library(kableExtra)
library(mice)

theme_set(theme_pubr(legend = "bottom"))


nnns_imputed<- readRDS("../Haojia_work/nnns_imputed.rds")

read imputed data

Code
dat=complete(nnns_imputed,1)
attach(dat);Percent_of_feeds_taken_by_mouth_at_discharge
  [1] 1.00 0.19 0.00 0.01 0.00 0.00 0.06 0.00 0.00 0.00 0.50 0.00 0.00 1.00 0.50
 [16] 0.02 0.00 0.00 0.29 0.00 0.00 0.01 0.14 0.30 0.06 0.41 0.95 0.01 0.17 1.00
 [31] 0.34 0.50 0.00 0.00 0.49 0.00 0.49 0.00 0.83 0.03 0.00 0.00 0.02 0.00 0.00
 [46] 0.00 1.00 1.00 0.00 0.00 0.20 0.01 0.00 1.00 0.05 0.00 0.14 0.00 0.12 1.00
 [61] 0.80 0.00 0.90 0.25 0.05 0.02 0.00 0.25 0.05 0.00 0.00 0.15 0.00 0.00 0.08
 [76] 0.00 0.03 0.30 0.23 0.01 0.50 0.09 0.10 1.00 0.44 0.80 0.50 0.22 0.15 0.00
 [91] 0.00 0.50 0.30 0.33 0.20 0.42 0.00 0.03 0.12 0.06 0.67 0.00 0.00 0.00 0.14
[106] 0.00 0.11 0.13 1.00 1.00 0.09 0.50 0.00 1.00 0.00

Use ZOIB package

ZOIB Model

\[ f(y_i|\eta_i) = \begin{cases} p_i, & y_i =0 \\ (1-p_i)q_i, & y_i =1 \\ (1 − p_i)(1 − q_i)Beta(\alpha_{i_1}, \alpha_{i_2}) &y_i \in (0, 1) \end{cases} \]

\[ \begin{aligned} logit\left(\mu^{(0,1)} = \frac{\alpha_1}{\alpha_1+ \alpha_2}\right) = \mathbf x_1 \boldsymbol \beta_1 + I_1(\mathbf z_{1,i}\gamma_{1,i}) \\ log\left(v_i = \alpha_1+ \alpha_2\right) = \mathbf x_2 \boldsymbol \beta_2 + I_2(\mathbf z_{2,i}\gamma_{2,i}) \\ logit\left(p_i\right) = \mathbf x_3 \boldsymbol \beta_3 + I_1(\mathbf z_{3,i}\gamma_{3,i}) \\ logit\left(q_i\right) = \mathbf x_4 \boldsymbol \beta_4 + I_1(\mathbf z_{4,i}\gamma_{4 ,i}) \end{aligned} \]

Fit ZOIB model

Code
set.seed(11282023) #Set seed, bayesian model!

tictoc::tic()
pre_op_model =zoib(Percent_of_feeds_taken_by_mouth_at_discharge #Response
            ~Pre_Op_NNNS_attention_score+
              Length_of_intubation_days+
              #Genetic_Syndrome_or_Chromosomal_Abnormality+
              Cardiac_Anatomy+
              Age_at_Surgery_days+
              Female
              #x1 design matrix
       | 1 #x2 design matrix- estimating variance
     |Pre_Op_NNNS_attention_score+
              Length_of_intubation_days+
              #Genetic_Syndrome_or_Chromosomal_Abnormality+
              Cardiac_Anatomy+
              Age_at_Surgery_days+
              Female #x3 design matrix
     |Pre_Op_NNNS_attention_score+
              Length_of_intubation_days+
              #Genetic_Syndrome_or_Chromosomal_Abnormality+
              Cardiac_Anatomy+
              Age_at_Surgery_days+
              Female, #x4 design matrix
     data = dat,
     n.response = 1,
     zero.inflation = TRUE,
     one.inflation = TRUE,
     link.mu = "logit",
     link.x0 = "logit",
     link.x1 =  "logit",
     random = 0,
     n.chain = 4, # number of MCMC chains
     n.iter = 3000, #number of iterations before burn/thin
     n.thin = 2, # thinning period
     n.burn = 200, # burn-in period 
     seeds = c(11,29,20,23) #vector of seeds for chains to make reproducable
     )
tictoc::toc()
saveRDS(pre_op_model, file="model1.RData")

Interpreting output:

b: vector of estimates from Eqn 1; that is, g(mu) = xb*b +z*gamma

d: vector of estimates from Eqn 2; that is, log(eta) = xd*d+z*gamma

b0: vector of estimates from Eqn 3; that is, g(p0) = x0*b0 +z*gamma

b1: vector of estimates from Eqn ;4 that is, g(p1) = x1*b1+z*gamma

Code
model1 =readRDS(file="model1.RData")

model1$coeff |> summary()

Iterations = 1:1400
Thinning interval = 1 
Number of chains = 4 
Sample size per chain = 1400 

1. Empirical mean and standard deviation for each variable,
   plus standard error of the mean:

          Mean      SD  Naive SE Time-series SE
b[1]  -0.39766 0.73827 0.0098656      0.0105416
b[2]   0.08288 0.16537 0.0022098      0.0023721
b[3]  -0.12173 0.06380 0.0008525      0.0009775
b[4]   0.50380 0.39070 0.0052210      0.0055496
b[5]  -0.22650 0.34453 0.0046040      0.0048484
b[6]  -0.01668 0.03035 0.0004056      0.0004300
b[7]  -0.43179 0.28720 0.0038379      0.0037451
b0[1] -1.78994 1.17778 0.0157387      0.0158262
b0[2] -0.16861 0.24238 0.0032390      0.0032220
b0[3]  0.28510 0.09346 0.0012489      0.0014040
b0[4]  1.51607 0.56165 0.0075053      0.0084505
b0[5]  1.12968 0.55712 0.0074448      0.0083296
b0[6] -0.04519 0.05284 0.0007061      0.0007082
b0[7] -0.64696 0.46872 0.0062635      0.0062115
b1[1] -4.38716 2.26145 0.0302199      0.0338096
b1[2]  0.61889 0.53337 0.0071275      0.0083096
b1[3] -0.38199 0.19827 0.0026495      0.0036841
b1[4] -0.20431 1.52993 0.0204446      0.0249500
b1[5] -0.84016 0.96093 0.0128409      0.0158681
b1[6]  0.17578 0.08472 0.0011321      0.0013195
b1[7]  0.72993 0.80360 0.0107386      0.0111088
d      1.01604 0.17344 0.0023177      0.0023910

2. Quantiles for each variable:

          2.5%      25%      50%       75%     97.5%
b[1]  -1.84891 -0.89203 -0.40888  0.094523  1.077142
b[2]  -0.24802 -0.02690  0.08219  0.193390  0.405817
b[3]  -0.24659 -0.16513 -0.12134 -0.078269  0.001577
b[4]  -0.28945  0.24200  0.51123  0.778278  1.232976
b[5]  -0.92601 -0.45667 -0.22729  0.006496  0.447900
b[6]  -0.07691 -0.03690 -0.01668  0.003513  0.043047
b[7]  -0.98798 -0.62709 -0.42902 -0.237098  0.138920
b0[1] -4.16669 -2.57137 -1.77618 -1.003397  0.452554
b0[2] -0.64851 -0.33305 -0.16627 -0.004057  0.301790
b0[3]  0.11778  0.22024  0.27862  0.344663  0.479738
b0[4]  0.43717  1.13710  1.50351  1.878963  2.637134
b0[5]  0.06402  0.75373  1.12702  1.494730  2.224017
b0[6] -0.15469 -0.08017 -0.04388 -0.008777  0.053560
b0[7] -1.58481 -0.95166 -0.63897 -0.323852  0.238658
b1[1] -9.15789 -5.79910 -4.28192 -2.832309 -0.350566
b1[2] -0.37062  0.25181  0.59932  0.953915  1.748874
b1[3] -0.78366 -0.51283 -0.37393 -0.245314 -0.008427
b1[4] -3.50537 -1.12108 -0.07413  0.838735  2.366966
b1[5] -2.81471 -1.45525 -0.81617 -0.178917  0.986399
b1[6]  0.02170  0.11732  0.17159  0.229341  0.351559
b1[7] -0.81516  0.18797  0.72793  1.262818  2.310501
d      0.66741  0.90080  1.02120  1.133012  1.345413

Check convergence

Traceplots

Code
traceplot(model1$coeff)

Autocorrelation plots

Code
autocorr.plot(model1$coeff)